cap log close
log using "${logpath}LIONS.log", replace

/*******************************************************************************
LIONS.do

This code creates statistics on US attorney specialization using LIONS data
*******************************************************************************/

*Load data case type data from the USAO National Caseload / LIONS Data (FY2021; source: https://www.justice.gov/usao/resources/foia-library/national-caseload-data/2021)
infix str district 1-10 str caseid 11-20 str cause_act 21-24 str ID 25-34 str create_date 35-45 str create_user 46-75 str update_date 76-86 str update_user 87-116 using "${rawdatapath}gs_case_cause_act.txt", clear
keep district caseid cause_act

*Getting whether each case is health care fraud
gen hc=cause_act=="FRHC"
collapse (max) hc, by(district caseid)

tempfile cases
save `cases', replace

*Load assignment data from the USAO National Caseload / LIONS Data (FY2021; source: https://www.justice.gov/usao/resources/foia-library/national-caseload-data/2021)
infix str district 1-10 str caseid 11-20 str crthisid 21-30 str ID 31-40 str staffid 41-50 str position 51 str start_date 52-62 str end_date 63-73 str create_date 74-84 str create_user 85-114 str update_date 115-125 str update_user 126-155 using "${rawdatapath}gs_assignment.txt", clear

*Keep only lead attorney assignments
keep district caseid staffid position
keep if position=="L"
duplicates drop

*Merge assignment and case type
merge m:1 district caseid using `cases'
keep if _merge==3

*Getting case counts for each attorney
gen cases=1
collapse (sum) hc cases, by(staffid)

*Keep only attorneys with at least one health care fraud case
keep if hc>=1

su hc, d // Referenced in Section 6.2, Paragraph 4

gen hc_share=hc/cases
su hc_share if hc>=1, d // Referenced in Section 6.2, Paragraph 4

log close